{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. Data processing\n",
    "\n",
    "- Download the Diabete dataset from https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/diabetes\n",
    "- Load the data using sklearn.\n",
    "- Preprocess the data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.1. Load the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of x: (768, 8)\n",
      "Shape of y: (768,)\n"
     ]
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "import numpy\n",
    "\n",
    "x_sparse, y = datasets.load_svmlight_file('diabetes')\n",
    "x = x_sparse.todense()\n",
    "\n",
    "print('Shape of x: ' + str(x.shape))\n",
    "print('Shape of y: ' + str(y.shape))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.2. Partition to training and test sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of x_train: (615, 8)\n",
      "Shape of x_test: (153, 8)\n",
      "Shape of y_train: (615, 1)\n",
      "Shape of y_test: (153, 1)\n"
     ]
    }
   ],
   "source": [
    "# partition the data to training and test sets\n",
    "n = x.shape[0]\n",
    "n_train = int(numpy.ceil(n * 0.8))\n",
    "n_test = n - n_train\n",
    "\n",
    "rand_indices = numpy.random.permutation(n)\n",
    "train_indices = rand_indices[0:n_train]\n",
    "test_indices = rand_indices[n_train:n]\n",
    "\n",
    "x_train = x[train_indices, :]\n",
    "x_test = x[test_indices, :]\n",
    "y_train = y[train_indices].reshape(n_train, 1)\n",
    "y_test = y[test_indices].reshape(n_test, 1)\n",
    "\n",
    "print('Shape of x_train: ' + str(x_train.shape))\n",
    "print('Shape of x_test: ' + str(x_test.shape))\n",
    "print('Shape of y_train: ' + str(y_train.shape))\n",
    "print('Shape of y_test: ' + str(y_test.shape))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.3. Feature scaling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Min-max normalization and standardization are two popular feature scaling methods.\n",
    "\n",
    "- Min-max normalization scales the features to the interval $[0, 1]$.\n",
    "- Standardization makes the features have zero mean and unit variance."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.35294118 0.74371859 0.59016393 ... 0.50074514 0.23441503 0.48333333]\n",
      " [0.05882353 0.42713568 0.54098361 ... 0.39642326 0.11656704 0.16666667]\n",
      " [0.47058824 0.91959799 0.52459016 ... 0.34724292 0.25362938 0.18333333]\n",
      " ...\n",
      " [0.29411765 0.6080402  0.59016393 ... 0.39046202 0.07130658 0.15      ]\n",
      " [0.05882353 0.63316583 0.49180328 ... 0.44858422 0.11571307 0.43333333]\n",
      " [0.05882353 0.46733668 0.57377049 ... 0.45305516 0.10119556 0.03333333]]\n",
      "max = \n",
      "[[1. 1. 1. 1. 1. 1. 1. 1.]]\n",
      "min = \n",
      "[[0. 0. 0. 0. 0. 0. 0. 0.]]\n"
     ]
    }
   ],
   "source": [
    "# Min-Max Normalization\n",
    "import numpy\n",
    "\n",
    "d = x.shape[1]\n",
    "xmin = numpy.min(x, axis=0).reshape(1, d)\n",
    "xmax = numpy.max(x, axis=0).reshape(1, d)\n",
    "xnew = (x - xmin) / (xmax - xmin)\n",
    "\n",
    "print(xnew)\n",
    "\n",
    "print('max = ')\n",
    "print(numpy.max(xnew, axis=0))\n",
    "\n",
    "print('min = ')\n",
    "print(numpy.min(xnew, axis=0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "xnew = \n",
      "[[ 0.63994726  0.84832379  0.14964075 ...  0.20401252  0.46849198\n",
      "   1.4259954 ]\n",
      " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
      "  -0.19067191]\n",
      " [ 1.23388019  1.94372388 -0.26394125 ... -1.10325559  0.60439732\n",
      "  -0.10558415]\n",
      " ...\n",
      " [ 0.3429808   0.00330087  0.14964075 ... -0.73518952 -0.68519336\n",
      "  -0.27575966]\n",
      " [-0.84488505  0.1597866  -0.47073225 ... -0.24020459 -0.37110101\n",
      "   1.17073215]\n",
      " [-0.84488505 -0.8730192   0.04624525 ... -0.20212882 -0.47378505\n",
      "  -0.87137393]]\n",
      "mean = \n",
      "[[-7.74843153e-17  3.61400724e-18 -1.32724416e-17  7.76288755e-17\n",
      "  -5.49329101e-17  5.12683067e-15  1.92438658e-15  2.19297959e-16]]\n",
      "std = \n",
      "[[1. 1. 1. 1. 1. 1. 1. 1.]]\n"
     ]
    }
   ],
   "source": [
    "# Standardization\n",
    "import numpy\n",
    "\n",
    "d = x.shape[1]\n",
    "mu = numpy.mean(x, axis=0).reshape(1, d)\n",
    "sig = numpy.std(x, axis=0).reshape(1, d)\n",
    "xnew = (x - mu) / sig\n",
    "\n",
    "print('xnew = ')\n",
    "print(xnew)\n",
    "\n",
    "print('mean = ')\n",
    "print(numpy.mean(xnew, axis=0))\n",
    "\n",
    "print('std = ')\n",
    "print(numpy.std(xnew, axis=0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### In this tutorial, we use the standardization to trainsform both training and test features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "test mean = \n",
      "[[ 0.04282502  0.14644089  0.13515084  0.07852463  0.12107582  0.05411205\n",
      "  -0.04668225  0.01295459]]\n",
      "test std = \n",
      "[[0.98823714 1.05366321 1.01518686 0.97945751 1.12619899 1.01806036\n",
      "  0.82103702 0.86354754]]\n"
     ]
    }
   ],
   "source": [
    "# Standardization\n",
    "import numpy\n",
    "\n",
    "# calculate mu and sig using the training set\n",
    "d = x_train.shape[1]\n",
    "mu = numpy.mean(x_train, axis=0).reshape(1, d)\n",
    "sig = numpy.std(x_train, axis=0).reshape(1, d)\n",
    "\n",
    "# transform the training features\n",
    "x_train = (x_train - mu) / sig\n",
    "\n",
    "# transform the test features\n",
    "x_test = (x_test - mu) / sig\n",
    "\n",
    "\n",
    "print('test mean = ')\n",
    "print(numpy.mean(x_test, axis=0))\n",
    "\n",
    "print('test std = ')\n",
    "print(numpy.std(x_test, axis=0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Add a dimension of all ones"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of x_train: (615, 9)\n",
      "Shape of x_test: (153, 9)\n"
     ]
    }
   ],
   "source": [
    "n_train, d = x_train.shape\n",
    "x_train = numpy.concatenate((x_train, numpy.ones((n_train, 1))), axis=1)\n",
    "\n",
    "n_test, d = x_test.shape\n",
    "x_test = numpy.concatenate((x_test, numpy.ones((n_test, 1))), axis=1)\n",
    "\n",
    "print('Shape of x_train: ' + str(x_train.shape))\n",
    "print('Shape of x_test: ' + str(x_test.shape))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. Logistic regression model\n",
    "\n",
    "The objective function is $Q (w; X, y) = \\frac{1}{n} \\sum_{i=1}^n \\log \\Big( 1 + \\exp \\big( - y_i x_i^T w \\big) \\Big) + \\frac{\\lambda}{2} \\| w \\|_2^2 $."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate the objective function value\n",
    "# Inputs:\n",
    "#     w: d-by-1 matrix\n",
    "#     x: n-by-d matrix\n",
    "#     y: n-by-1 matrix\n",
    "#     lam: scalar, the regularization parameter\n",
    "# Return:\n",
    "#     objective function value (scalar)\n",
    "def objective(w, x, y, lam):\n",
    "    n, d = x.shape\n",
    "    yx = numpy.multiply(y, x) # n-by-d matrix\n",
    "    yxw = numpy.dot(yx, w) # n-by-1 matrix\n",
    "    vec1 = numpy.exp(-yxw) # n-by-1 matrix\n",
    "    vec2 = numpy.log(1 + vec1) # n-by-1 matrix\n",
    "    loss = numpy.mean(vec2) # scalar\n",
    "    reg = lam / 2 * numpy.sum(w * w) # scalar\n",
    "    return loss + reg\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Initial objective function value = 0.6931471805599453\n"
     ]
    }
   ],
   "source": [
    "# initialize w\n",
    "d = x_train.shape[1]\n",
    "w = numpy.zeros((d, 1))\n",
    "\n",
    "# evaluate the objective function value at w\n",
    "lam = 1E-6\n",
    "objval0 = objective(w, x_train, y_train, lam)\n",
    "print('Initial objective function value = ' + str(objval0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. Numerical optimization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.1. Calculate the full gradient\n",
    "\n",
    "The gradient at $w$ is $g = - \\frac{1}{n} \\sum_{i=1}^n \\frac{y_i x_i }{1 + \\exp ( y_i x_i^T w)} + \\lambda w$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate the gradient\n",
    "# Inputs:\n",
    "#     w: d-by-1 matrix\n",
    "#     x: n-by-d matrix\n",
    "#     y: n-by-1 matrix\n",
    "#     lam: scalar, the regularization parameter\n",
    "# Return:\n",
    "#     g: g: d-by-1 matrix, full gradient\n",
    "def gradient(w, x, y, lam):\n",
    "    n, d = x.shape\n",
    "    yx = numpy.multiply(y, x) # n-by-d matrix\n",
    "    yxw = numpy.dot(yx, w) # n-by-1 matrix\n",
    "    vec1 = numpy.exp(yxw) # n-by-1 matrix\n",
    "    vec2 = numpy.divide(yx, 1+vec1) # n-by-d matrix\n",
    "    vec3 = -numpy.mean(vec2, axis=0).reshape(d, 1) # d-by-1 matrix\n",
    "    g = vec3 + lam * w\n",
    "    return g"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.2. Gradient descent\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gradient descent for solving logistic regression\n",
    "# Inputs:\n",
    "#     x: n-by-d matrix\n",
    "#     y: n-by-1 matrix\n",
    "#     lam: scalar, the regularization parameter\n",
    "#     stepsize: scalar\n",
    "#     max_iter: integer, the maximal iterations\n",
    "#     w: d-by-1 matrix, initialization of w\n",
    "# Return:\n",
    "#     w: d-by-1 matrix, the solution\n",
    "#     objvals: a record of each iteration's objective value\n",
    "def grad_descent(x, y, lam, stepsize, max_iter=100, w=None):\n",
    "    n, d = x.shape\n",
    "    objvals = numpy.zeros(max_iter) # store the objective values\n",
    "    if w is None:\n",
    "        w = numpy.zeros((d, 1)) # zero initialization\n",
    "    \n",
    "    for t in range(max_iter):\n",
    "        objval = objective(w, x, y, lam)\n",
    "        objvals[t] = objval\n",
    "        print('Objective value at t=' + str(t) + ' is ' + str(objval))\n",
    "        g = gradient(w, x, y, lam)\n",
    "        w -= stepsize * g\n",
    "    \n",
    "    return w, objvals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Objective value at t=0 is 0.6931471805599453\n",
      "Objective value at t=1 is 0.5857422010224674\n",
      "Objective value at t=2 is 0.5404055860105454\n",
      "Objective value at t=3 is 0.5151957841771834\n",
      "Objective value at t=4 is 0.4990843613352312\n",
      "Objective value at t=5 is 0.4879904229793065\n",
      "Objective value at t=6 is 0.4799953399338087\n",
      "Objective value at t=7 is 0.4740522631313984\n",
      "Objective value at t=8 is 0.46953259598819874\n",
      "Objective value at t=9 is 0.46603377304313864\n",
      "Objective value at t=10 is 0.4632859399675407\n",
      "Objective value at t=11 is 0.4611019171637835\n",
      "Objective value at t=12 is 0.45934833927905316\n",
      "Objective value at t=13 is 0.4579280772961021\n",
      "Objective value at t=14 is 0.45676907664354227\n",
      "Objective value at t=15 is 0.4558170315694747\n",
      "Objective value at t=16 is 0.4550304454901687\n",
      "Objective value at t=17 is 0.45437722328184754\n",
      "Objective value at t=18 is 0.453832273543992\n",
      "Objective value at t=19 is 0.4533757919269716\n",
      "Objective value at t=20 is 0.4529920128789861\n",
      "Objective value at t=21 is 0.4526682892545334\n",
      "Objective value at t=22 is 0.45239440504775347\n",
      "Objective value at t=23 is 0.45216205627508854\n",
      "Objective value at t=24 is 0.4519644547348752\n",
      "Objective value at t=25 is 0.4517960226421246\n",
      "Objective value at t=26 is 0.45165215521586544\n",
      "Objective value at t=27 is 0.4515290345977724\n",
      "Objective value at t=28 is 0.4514234829126703\n",
      "Objective value at t=29 is 0.4513328454371381\n",
      "Objective value at t=30 is 0.4512548971152751\n",
      "Objective value at t=31 is 0.45118776731531307\n",
      "Objective value at t=32 is 0.4511298789374386\n",
      "Objective value at t=33 is 0.45107989888628613\n",
      "Objective value at t=34 is 0.4510366975978158\n",
      "Objective value at t=35 is 0.4509993158208909\n",
      "Objective value at t=36 is 0.4509669372424078\n",
      "Objective value at t=37 is 0.45093886584265724\n",
      "Objective value at t=38 is 0.4509145070974723\n",
      "Objective value at t=39 is 0.45089335232231087\n",
      "Objective value at t=40 is 0.45087496559302137\n",
      "Objective value at t=41 is 0.4508589727878019\n",
      "Objective value at t=42 is 0.4508450523816432\n",
      "Objective value at t=43 is 0.45083292769350547\n",
      "Objective value at t=44 is 0.45082236034155954\n",
      "Objective value at t=45 is 0.450813144706015\n",
      "Objective value at t=46 is 0.4508051032346781\n",
      "Objective value at t=47 is 0.45079808245520686\n",
      "Objective value at t=48 is 0.4507919495814607\n",
      "Objective value at t=49 is 0.45078658962044554\n",
      "Objective value at t=50 is 0.4507819029020031\n",
      "Objective value at t=51 is 0.4507778029662304\n",
      "Objective value at t=52 is 0.4507742147542197\n",
      "Objective value at t=53 is 0.45077107305644964\n",
      "Objective value at t=54 is 0.450768321180417\n",
      "Objective value at t=55 is 0.4507659098051388\n",
      "Objective value at t=56 is 0.45076379599517447\n",
      "Objective value at t=57 is 0.45076194235102607\n",
      "Objective value at t=58 is 0.4507603162762921\n",
      "Objective value at t=59 is 0.4507588893449092\n",
      "Objective value at t=60 is 0.4507576367542996\n",
      "Objective value at t=61 is 0.4507565368523415\n",
      "Objective value at t=62 is 0.45075557072784844\n",
      "Objective value at t=63 is 0.45075472185574805\n",
      "Objective value at t=64 is 0.45075397578941206\n",
      "Objective value at t=65 is 0.45075331989368206\n",
      "Objective value at t=66 is 0.4507527431130392\n",
      "Objective value at t=67 is 0.4507522357701613\n",
      "Objective value at t=68 is 0.4507517893907687\n",
      "Objective value at t=69 is 0.45075139655123575\n",
      "Objective value at t=70 is 0.45075105074592997\n",
      "Objective value at t=71 is 0.45075074627165973\n",
      "Objective value at t=72 is 0.45075047812696917\n",
      "Objective value at t=73 is 0.45075024192432683\n",
      "Objective value at t=74 is 0.4507500338135181\n",
      "Objective value at t=75 is 0.4507498504147798\n",
      "Objective value at t=76 is 0.4507496887604101\n",
      "Objective value at t=77 is 0.4507495462437583\n",
      "Objective value at t=78 is 0.4507494205746389\n",
      "Objective value at t=79 is 0.45074930974034816\n",
      "Objective value at t=80 is 0.45074921197156237\n",
      "Objective value at t=81 is 0.45074912571249637\n",
      "Objective value at t=82 is 0.4507490495947798\n",
      "Objective value at t=83 is 0.45074898241457817\n",
      "Objective value at t=84 is 0.4507489231125511\n",
      "Objective value at t=85 is 0.450748870756287\n",
      "Objective value at t=86 is 0.45074882452490467\n",
      "Objective value at t=87 is 0.4507487836955498\n",
      "Objective value at t=88 is 0.4507487476315486\n",
      "Objective value at t=89 is 0.4507487157720137\n",
      "Objective value at t=90 is 0.45074868762271864\n",
      "Objective value at t=91 is 0.45074866274808756\n",
      "Objective value at t=92 is 0.450748640764158\n",
      "Objective value at t=93 is 0.45074862133239973\n",
      "Objective value at t=94 is 0.4507486041542827\n",
      "Objective value at t=95 is 0.45074858896650144\n",
      "Objective value at t=96 is 0.4507485755367779\n",
      "Objective value at t=97 is 0.450748563660168\n",
      "Objective value at t=98 is 0.45074855315581563\n",
      "Objective value at t=99 is 0.45074854386409285\n"
     ]
    }
   ],
   "source": [
    "# example\n",
    "lam = 1E-6\n",
    "stepsize = 1.0\n",
    "w, objvals_gd = grad_descent(x_train, y_train, lam, stepsize)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.3. Stochastic gradient descent (SGD)\n",
    "\n",
    "Define $Q_i (w) = \\log \\Big( 1 + \\exp \\big( - y_i x_i^T w \\big) \\Big) + \\frac{\\lambda}{2} \\| w \\|_2^2 $.\n",
    "\n",
    "The stochastic gradient at $w$ is $g_i = \\frac{\\partial Q_i }{ \\partial w} = -\\frac{y_i x_i }{1 + \\exp ( y_i x_i^T w)} + \\lambda w$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate the objective Q_i and the gradient of Q_i\n",
    "# Inputs:\n",
    "#     w: d-by-1 matrix\n",
    "#     xi: 1-by-d matrix\n",
    "#     yi: scalar\n",
    "#     lam: scalar, the regularization parameter\n",
    "# Return:\n",
    "#     obj: scalar, the objective Q_i\n",
    "#     g: d-by-1 matrix, gradient of Q_i\n",
    "def stochastic_objective_gradient(w, xi, yi, lam):\n",
    "    d = xi.shape[0]\n",
    "    yx = yi * xi # 1-by-d matrix\n",
    "    yxw = float(numpy.dot(yx, w)) # scalar\n",
    "    \n",
    "    # calculate objective function Q_i\n",
    "    loss = numpy.log(1 + numpy.exp(-yxw)) # scalar\n",
    "    reg = lam / 2 * numpy.sum(w * w) # scalar\n",
    "    obj = loss + reg\n",
    "    \n",
    "    # calculate stochastic gradient\n",
    "    g_loss = -yx.T / (1 + numpy.exp(yxw)) # d-by-1 matrix\n",
    "    g = g_loss + lam * w # d-by-1 matrix\n",
    "    \n",
    "    return obj, g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# SGD for solving logistic regression\n",
    "# Inputs:\n",
    "#     x: n-by-d matrix\n",
    "#     y: n-by-1 matrix\n",
    "#     lam: scalar, the regularization parameter\n",
    "#     stepsize: scalar\n",
    "#     max_epoch: integer, the maximal epochs\n",
    "#     w: d-by-1 matrix, initialization of w\n",
    "# Return:\n",
    "#     w: the solution\n",
    "#     objvals: record of each iteration's objective value\n",
    "def sgd(x, y, lam, stepsize, max_epoch=100, w=None):\n",
    "    n, d = x.shape\n",
    "    objvals = numpy.zeros(max_epoch) # store the objective values\n",
    "    if w is None:\n",
    "        w = numpy.zeros((d, 1)) # zero initialization\n",
    "    \n",
    "    for t in range(max_epoch):\n",
    "        # randomly shuffle the samples\n",
    "        rand_indices = numpy.random.permutation(n)\n",
    "        x_rand = x[rand_indices, :]\n",
    "        y_rand = y[rand_indices, :]\n",
    "        \n",
    "        objval = 0 # accumulate the objective values\n",
    "        for i in range(n):\n",
    "            xi = x_rand[i, :] # 1-by-d matrix\n",
    "            yi = float(y_rand[i, :]) # scalar\n",
    "            obj, g = stochastic_objective_gradient(w, xi, yi, lam)\n",
    "            objval += obj\n",
    "            w -= stepsize * g\n",
    "        \n",
    "        stepsize *= 0.9 # decrease step size\n",
    "        objval /= n\n",
    "        objvals[t] = objval\n",
    "        print('Objective value at epoch t=' + str(t) + ' is ' + str(objval))\n",
    "    \n",
    "    return w, objvals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Objective value at epoch t=0 is 0.5151549707339808\n",
      "Objective value at epoch t=1 is 0.5206758924832361\n",
      "Objective value at epoch t=2 is 0.5043274140498304\n",
      "Objective value at epoch t=3 is 0.5017007852231665\n",
      "Objective value at epoch t=4 is 0.5061405326180962\n",
      "Objective value at epoch t=5 is 0.4999228760733248\n",
      "Objective value at epoch t=6 is 0.49963857212327895\n",
      "Objective value at epoch t=7 is 0.49163172119009857\n",
      "Objective value at epoch t=8 is 0.493069056246553\n",
      "Objective value at epoch t=9 is 0.4907411452841133\n",
      "Objective value at epoch t=10 is 0.4863129094437151\n",
      "Objective value at epoch t=11 is 0.48762427080878673\n",
      "Objective value at epoch t=12 is 0.484085909137715\n",
      "Objective value at epoch t=13 is 0.48001497432115775\n",
      "Objective value at epoch t=14 is 0.4817474833510589\n",
      "Objective value at epoch t=15 is 0.4793509087801017\n",
      "Objective value at epoch t=16 is 0.4767657378561972\n",
      "Objective value at epoch t=17 is 0.4771544041110307\n",
      "Objective value at epoch t=18 is 0.4771127791611159\n",
      "Objective value at epoch t=19 is 0.4756496388899948\n",
      "Objective value at epoch t=20 is 0.47446665345323014\n",
      "Objective value at epoch t=21 is 0.47439128502518074\n",
      "Objective value at epoch t=22 is 0.4737370027226803\n",
      "Objective value at epoch t=23 is 0.47267639310590825\n",
      "Objective value at epoch t=24 is 0.47191785552107074\n",
      "Objective value at epoch t=25 is 0.47171459632925145\n",
      "Objective value at epoch t=26 is 0.471279795938451\n",
      "Objective value at epoch t=27 is 0.47061889005217467\n",
      "Objective value at epoch t=28 is 0.4703274004652669\n",
      "Objective value at epoch t=29 is 0.4700227818881338\n",
      "Objective value at epoch t=30 is 0.46971934031519585\n",
      "Objective value at epoch t=31 is 0.46952963730798025\n",
      "Objective value at epoch t=32 is 0.46917852435722973\n",
      "Objective value at epoch t=33 is 0.46898797150430854\n",
      "Objective value at epoch t=34 is 0.46872260530491755\n",
      "Objective value at epoch t=35 is 0.4685310147594892\n",
      "Objective value at epoch t=36 is 0.46837324457739293\n",
      "Objective value at epoch t=37 is 0.46821106543926755\n",
      "Objective value at epoch t=38 is 0.46806106198558467\n",
      "Objective value at epoch t=39 is 0.4679457506308163\n",
      "Objective value at epoch t=40 is 0.46783286606464813\n",
      "Objective value at epoch t=41 is 0.4677062232966566\n",
      "Objective value at epoch t=42 is 0.4676222500440443\n",
      "Objective value at epoch t=43 is 0.4675353440968213\n",
      "Objective value at epoch t=44 is 0.46745263216418703\n",
      "Objective value at epoch t=45 is 0.46738367127197367\n",
      "Objective value at epoch t=46 is 0.4673205301775398\n",
      "Objective value at epoch t=47 is 0.4672586149059332\n",
      "Objective value at epoch t=48 is 0.46720849138916365\n",
      "Objective value at epoch t=49 is 0.4671636123471883\n",
      "Objective value at epoch t=50 is 0.46711863404923276\n",
      "Objective value at epoch t=51 is 0.4670821320887416\n",
      "Objective value at epoch t=52 is 0.467046620667018\n",
      "Objective value at epoch t=53 is 0.4670162317017715\n",
      "Objective value at epoch t=54 is 0.4669888624203188\n",
      "Objective value at epoch t=55 is 0.4669646171841261\n",
      "Objective value at epoch t=56 is 0.46694191320382683\n",
      "Objective value at epoch t=57 is 0.466921243348373\n",
      "Objective value at epoch t=58 is 0.4669023847131166\n",
      "Objective value at epoch t=59 is 0.46688627243831526\n",
      "Objective value at epoch t=60 is 0.4668713524132149\n",
      "Objective value at epoch t=61 is 0.4668578592205547\n",
      "Objective value at epoch t=62 is 0.466845685682621\n",
      "Objective value at epoch t=63 is 0.4668349829990905\n",
      "Objective value at epoch t=64 is 0.46682510264507626\n",
      "Objective value at epoch t=65 is 0.4668162450150578\n",
      "Objective value at epoch t=66 is 0.46680836512366825\n",
      "Objective value at epoch t=67 is 0.46680120946559034\n",
      "Objective value at epoch t=68 is 0.4667948284468391\n",
      "Objective value at epoch t=69 is 0.46678903780374886\n",
      "Objective value at epoch t=70 is 0.4667837927464804\n",
      "Objective value at epoch t=71 is 0.4667791271762007\n",
      "Objective value at epoch t=72 is 0.46677489073025286\n",
      "Objective value at epoch t=73 is 0.46677107935366513\n",
      "Objective value at epoch t=74 is 0.4667676889952719\n",
      "Objective value at epoch t=75 is 0.46676460626150046\n",
      "Objective value at epoch t=76 is 0.46676182960757917\n",
      "Objective value at epoch t=77 is 0.46675933800830155\n",
      "Objective value at epoch t=78 is 0.4667570985848557\n",
      "Objective value at epoch t=79 is 0.4667550718260116\n",
      "Objective value at epoch t=80 is 0.4667532554999173\n",
      "Objective value at epoch t=81 is 0.46675161850383046\n",
      "Objective value at epoch t=82 is 0.46675014613335175\n",
      "Objective value at epoch t=83 is 0.4667488191929259\n",
      "Objective value at epoch t=84 is 0.46674762144693566\n",
      "Objective value at epoch t=85 is 0.4667465499555729\n",
      "Objective value at epoch t=86 is 0.46674558474066424\n",
      "Objective value at epoch t=87 is 0.46674471334364315\n",
      "Objective value at epoch t=88 is 0.46674393144387744\n",
      "Objective value at epoch t=89 is 0.46674322556952014\n",
      "Objective value at epoch t=90 is 0.46674259180850214\n",
      "Objective value at epoch t=91 is 0.46674202050525815\n",
      "Objective value at epoch t=92 is 0.4667415069618475\n",
      "Objective value at epoch t=93 is 0.4667410443031105\n",
      "Objective value at epoch t=94 is 0.4667406281007143\n",
      "Objective value at epoch t=95 is 0.46674025347420983\n",
      "Objective value at epoch t=96 is 0.46673991613586086\n",
      "Objective value at epoch t=97 is 0.46673961234040834\n",
      "Objective value at epoch t=98 is 0.46673933937055956\n",
      "Objective value at epoch t=99 is 0.46673909374730393\n"
     ]
    }
   ],
   "source": [
    "# example\n",
    "lam = 1E-6\n",
    "stepsize = 0.1\n",
    "w, objvals_sgd = sgd(x_train, y_train, lam, stepsize)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compare GD with SGD\n",
    "\n",
    "Plot objective function values against epochs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'objvals_gd' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-11-9b8d8f4cdfb5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mepochs_gd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobjvals_gd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0mepochs_sgd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobjvals_sgd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'objvals_gd' is not defined"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10539d978>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "fig = plt.figure(figsize=(6, 4))\n",
    "\n",
    "epochs_gd = range(len(objvals_gd))\n",
    "epochs_sgd = range(len(objvals_sgd))\n",
    "\n",
    "line0, = plt.plot(epochs_gd, objvals_gd, '--b', LineWidth=4)\n",
    "line1, = plt.plot(epochs_sgd, objvals_sgd, '-r', LineWidth=2)\n",
    "plt.xlabel('Epochs', FontSize=20)\n",
    "plt.ylabel('Objective Value', FontSize=20)\n",
    "plt.xticks(FontSize=16)\n",
    "plt.yticks(FontSize=16)\n",
    "plt.legend([line0, line1], ['GD', 'SGD'], fontsize=20)\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "fig.savefig('compare_gd_sgd.pdf', format='pdf', dpi=1200)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.3. Mini-batch SGD\n",
    "\n",
    "Define $Q_I (w) = \\frac{1}{b} \\sum_{i \\in I} \\log \\Big( 1 + \\exp \\big( - y_i x_i^T w \\big) \\Big) + \\frac{\\lambda}{2} \\| w \\|_2^2 $, where $I$ is a set containing $b$ indices randomly drawn from $\\{ 1, \\cdots , n \\}$ without replacement.\n",
    "\n",
    "The stochastic gradient at $w$ is $g_I = \\frac{\\partial Q_I }{ \\partial w} = \\frac{1}{b} \\sum_{i \\in I} \\frac{- y_i x_i }{1 + \\exp ( y_i x_i^T w)} + \\lambda w$."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Predict class label\n",
    "# Inputs:\n",
    "#     w: d-by-1 matrix\n",
    "#     X: m-by-d matrix\n",
    "# Return:\n",
    "#     f: m-by-1 matrix, the predictions\n",
    "def predict(w, X):\n",
    "    xw = numpy.dot(X, w)\n",
    "    f = numpy.sign(xw)\n",
    "    return f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training classification error is 0.2601626016260163\n"
     ]
    }
   ],
   "source": [
    "# evaluate training error\n",
    "f_train = predict(w, x_train)\n",
    "diff = numpy.abs(f_train - y_train) / 2\n",
    "error_train = numpy.mean(diff)\n",
    "print('Training classification error is ' + str(error_train))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test classification error is 0.28104575163398693\n"
     ]
    }
   ],
   "source": [
    "# evaluate test error\n",
    "f_test = predict(w, x_test)\n",
    "diff = numpy.abs(f_test - y_test) / 2\n",
    "error_test = numpy.mean(diff)\n",
    "print('Test classification error is ' + str(error_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
